rm(list=ls())
library(piecewiseSEM)
library(nlme) # Version 3.1.122
library(ggplot2)
library(corrplot)
library(car) #VIF calculations
library(vegan)

# Read-in organized data
setwd("~/Analyses/_RESULTS/SES-SEM/__Organized Data")
alldat.site<-read.csv("data_wakatobi_allDataMerged_10kmHumans.csv")

############################################################################################################
## The following adds flexibility to choose different response variables (fish ecology metrics) for the SEM
############################################################################################################
responseDF<-as.data.frame(cbind(fish.response=c("log_biomass_g", "biomass_g", "no_of_species", "shannon", 
                                                "invsimpson", "SimpsonEvenness"),
                                fish.title=c("log Total Biomass (g)", "Total Biomass (g)", "Richness", 
                                             "Shannon Diversity (H')", "Inverse Simpson's Diversity (D2)", 
                                             "Simpson's Evenness (E)" )))
## Identify fish response column name and title
## CHOICES:
## for biomass, set as biomass_g
## for log biomass, set as log_biomass_g
## for species richness, set as no_of_species
## for shannon diversity, set as shannon
## for inverse simpson's, set as invsimpson
## for simpson's evenness, set as SimpsonEvenness
fish.col<-"biomass_g" # Set response here
fish.row<-responseDF$fish.response %in% fish.col
fish.title<-as.character(responseDF[fish.row, "fish.title"])

# NEXT: Plot scatterplots
# First, create df scatter.final with all response + predictor variables + hierarchical variables
otherresponse<-responseDF$fish.response[!responseDF$fish.response %in% fish.col]
nonpredictors<-names(alldat.site) %in% c("site_id", "Site.Name", as.character(otherresponse))
scatter.final<-alldat.site[,!nonpredictors]
loc.col<-grep("location", names(scatter.final))
response.col<-grep(fish.col, names(scatter.final))
scatter.names<-names(scatter.final)[-c(loc.col, response.col)]

########################################################################################
########################################################################################
########################################################################################
# Set graph names here: should match object scatter.names
scatter.titles<-c( # Site metadata
"Latitude", "Longitude", "Exposure", "Visibility", "Reef Type",  
                  
# Fish Landings (catch) data
"Mean Landings", "Mean Personal", "Mean Pengumpul", "Mean Papalele", "Mean Market", "Mean On-Island", 
"Total Landings", "Total Personal", "Total Pengumpul", "Total Papalele", "Total Market", "Total On-Island", 
"Proportion Personal", "Proportion Pengumpul", "Proportion Papalele", "Proportion Market", "Proportion On-Island",
                  
# Benthic cover data
"Rugosity",
"Dead Coral With Algae", "Macroalgae", "Rubble", "Rock", "Soft Coral", "Sand", 
"All Hard Coral", "All Abiotic Substrate",
                  
# Oceanography data (from MSEC)
"NPP (Mean)", "NPP (Min)", "NPP (Max)", "NPP (SD)", "NPP (Interannual SD)",
"Reef Area within 5km", 
"Wave Energy (Mean)", "Wave Energy (SD)", "Wave Energy (Interannual SD)", 
"Wind Fetch", 

# SST data
"SST SD", "SST 50th Percentile", "SST 98th Percentile", "SST 2nd Percentile", "SST Kurtosis", "SST Skewness",
                  
# Human Population data
"Population", "Fishers", "Rowboats", "Motorboats"
)
# Create scatterplots
for(i in 1:length(scatter.names))
{
  newfile=paste("plot_scatter_", fish.col, "_vs_", scatter.names[i], ".pdf", sep="")
  p<-ggplot(data=scatter.final, aes(x=get(scatter.names[i]), y=get(fish.col))) + 
    geom_point(aes(x=get(scatter.names[i]), y=get(fish.col), color=location, shape=type_reef), size=2) +
    labs(y=fish.title, x=scatter.titles[i]) +  
    theme_light()+
    theme(text = element_text(size=18), 
          legend.position="right")
  

  print(p)      
}

##### CALCULATE CORRELATIONS and VIF
##### NOTE: Correlations and VIF are unaffected by scaling (which preserves the shape and spread of data)
##### i.e., scaling can wait until the last step

##### Remove discreet variables
discreetvar<-match(c("wave_wind_fetch", "type_reef", "exposed"), scatter.names)
corr.names<-scatter.names[-discreetvar]
corr.final<-scatter.final[corr.names]

##### Rename rows and columns for correlation plots
discreetvar.titles<-match(c("Wind Fetch", "Reef Type", "Exposure"), scatter.titles)
corr.titles<-scatter.titles[-discreetvar.titles]
colnames(corr.final)<-corr.titles


cor.dat<-cor(corr.final)
write.csv(cor.dat, file="_Table_CorrelationsPearson.csv")
cor.test<-abs(cor.dat)>0.5
write.csv(cor.test, file="_Table_CorrelationsTestPearson.csv")

# Spearman's for all ranked, ordered vars (e.g., DACOR data)
cor.spear<-cor(corr.final, method="spearman")
write.csv(cor.spear, file="_Table_CorrelationsSpearman.csv")
cor.spear.test<-abs(cor.spear)>0.5
write.csv(cor.spear.test, file="_Table_CorrelationsTestSpearman.csv")

# Generate p values and confidence intervals for each correlation pair
pvals<-cor.mtest(corr.final, conf.level=0.95)
corrplot(cor.dat, method="color", tl.col="black", tl.cex=0.5, number.cex=0.4, p.mat=pvals$p, sig.level=0.05, insig="blank", cl.align.text="r", addgrid.col="grey")

### Test for MULTICOLLINEARITY (calculate VIF)
### Equations for MULTICOLLINEARITY can be recycled as PSEM equations
analysis.col<-grep(fish.col, names(alldat.site))

### FIRST, construct simple model that only uses site-level data (i.e., no fish landings data)
form1a<-as.formula(paste(names(alldat.site)[analysis.col], " ~ All_HardCoral + reef_area_5km", sep=""))
form1b<-as.formula("All_HardCoral ~ SST_98perc + Population_2017")

fit1a <- lm(form1a, data=alldat.site)
fit1b <- lm(form1b, data=alldat.site)

# Note: only need to calculate vif for formulas with at least two predictors
vif(fit1a)
## All_HardCoral reef_area_5km 
##      1.243295      1.243295
vif(fit1b)
##      SST_98perc Population_2017 
##        1.091423        1.091423
################################################################################
################################################################################
# STRUCTURAL EQUATION MODELS
################################################################################
################################################################################
# PSEM of site-level data only (i.e., no fish landings data)

### SUBSET only model variables from alldat
all.forms_1<-ls(pattern="form1")
all.vars_1<-all.vars(get(all.forms_1[1]))
for (i in 2:length(all.forms_1))
{
  vars_i<-all.vars(get(all.forms_1[i]))
  all.vars_1<-append(all.vars_1, vars_i)    
}
model.vars_1<-unique(all.vars_1)

sem.vars.site<-alldat.site[c("location", "type_reef", model.vars_1)]
sem.site.scaled<-as.data.frame(apply(sem.vars.site[model.vars_1], 2, scale))
sem.site.scaled<-cbind(sem.vars.site$location, sem.vars.site$type_reef, sem.site.scaled)
names(sem.site.scaled)[1:2]<- c("location", "reef_type")

waka.sitelevel.psem<-psem(lm(form1a, data=sem.site.scaled), lm(form1b, data=sem.site.scaled)); 
summary(waka.sitelevel.psem, .progressBar = F, digits=2)
## 
## Structural Equation Model of waka.sitelevel.psem 
## 
## Call:
##   biomass_g ~ All_HardCoral + reef_area_5km
##   All_HardCoral ~ SST_98perc + Population_2017
## 
##     AIC      BIC
##  20.646   28.202
## 
## ---
## Tests of directed separation:
## 
##                          Independ.Claim Estimate Std.Error DF Crit.Value
##   All_HardCoral  ~  reef_area_5km + ...  -0.1975    0.2907 15    -0.6792
##          biomass_g  ~  SST_98perc + ...   0.0340    0.2255 15     0.1507
##     biomass_g  ~  Population_2017 + ...  -0.3475    0.2708 15    -1.2833
##   P.Value 
##    0.5074 
##    0.8822 
##    0.2189 
## 
## Global goodness-of-fit:
## 
##   Fisher's C = 4.646 with P-value = 0.59 and on 6 degrees of freedom
## 
## ---
## Coefficients:
## 
##        Response       Predictor Estimate Std.Error DF Crit.Value P.Value
##       biomass_g   All_HardCoral   0.2598    0.2194 16     1.1842  0.2536
##       biomass_g   reef_area_5km   0.6860    0.2194 16     3.1264  0.0065
##   All_HardCoral      SST_98perc  -0.3167    0.2179 16    -1.4533  0.1655
##   All_HardCoral Population_2017   0.3686    0.2179 16     1.6912  0.1102
##   Std.Estimate   
##         0.2598   
##         0.6860 **
##        -0.3167   
##         0.3686   
## 
##   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05
## 
## Individual R-squared:
## 
##        Response method R.squared
##       biomass_g   none      0.38
##   All_HardCoral   none      0.30
## Site-level PSEM with reef_type hierarchy
waka.sitelevel.reeftype.psem<-psem(lme(form1a, random = ~ 1 | reef_type, data=sem.site.scaled), 
                      lme(form1b, random = ~ 1 | reef_type, data=sem.site.scaled))

summary(waka.sitelevel.reeftype.psem, .progressBar = F)
## 
## Structural Equation Model of waka.sitelevel.reeftype.psem 
## 
## Call:
##   biomass_g ~ All_HardCoral + reef_area_5km
##   All_HardCoral ~ SST_98perc + Population_2017
## 
##     AIC      BIC
##  24.684   34.128
## 
## ---
## Tests of directed separation:
## 
##                          Independ.Claim Estimate Std.Error DF Crit.Value
##   All_HardCoral  ~  reef_area_5km + ...  -0.1975    0.2907 14    -0.6792
##          biomass_g  ~  SST_98perc + ...  -0.0411    0.2272 14    -0.1810
##     biomass_g  ~  Population_2017 + ...  -0.3475    0.2708 14    -1.2833
##   P.Value 
##    0.5081 
##    0.8590 
##    0.2202 
## 
## Global goodness-of-fit:
## 
##   Fisher's C = 4.684 with P-value = 0.585 and on 6 degrees of freedom
## 
## ---
## Coefficients:
## 
##        Response       Predictor Estimate Std.Error DF Crit.Value P.Value
##       biomass_g   All_HardCoral   0.3169    0.2182 15     1.4520  0.1671
##       biomass_g   reef_area_5km   0.5500    0.2462 15     2.2344  0.0411
##   All_HardCoral      SST_98perc  -0.3167    0.2179 15    -1.4533  0.1667
##   All_HardCoral Population_2017   0.3686    0.2179 15     1.6912  0.1115
##   Std.Estimate  
##         0.3169  
##         0.5500 *
##        -0.3167  
##         0.3686  
## 
##   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05
## 
## Individual R-squared:
## 
##        Response method Marginal Conditional
##       biomass_g   none     0.23        0.40
##   All_HardCoral   none     0.28        0.28